years <- c(103, 104, 105, 106)
all.df <- data.frame()
for(i in 1:4) {
  message(years[i])
  file.name <- paste0("./data/", years[i], "年來臺旅客消費及動向調查(原始資料).csv")
  df <- read.csv(file.name, fileEncoding = "BIG5")
  df <- df %>% 
    select(stay, freq, purp1, purp2, type, prepay, pmoney, pdollar, airf, pit1, pit2, pit3, pit4, pit5, money, dollar, money1, p1, money2, p2, money3, p3, money4, p4, money5, p5, money6, p6, m601, m602, m603, m604, m605, m606, m607, m608, m609, m610, act01, act02, act03, act04, act05, act06, act07, act08, act09, act10, act11, act12, act13, act14, act15, act16, act17, act18, act19, nation, age, income, educ, occup, gender) %>%
    mutate(id=sprintf("%d%04d", years[i], as.numeric(rownames(.)))) %>% 
    filter(purp1 == 1 | purp2 == 1) 
  df$year = years[i]
  all.df <- rbind(all.df, df)
}
## 103
## 104
## 105
## 106
saveRDS(all.df, "alldf.rds")
alldf <- readRDS("./alldf.rds")
dim(alldf)
## [1] 23989    65
str(alldf)
## 'data.frame':    23989 obs. of  65 variables:
##  $ stay   : int  12 6 6 7 7 17 7 7 5 5 ...
##  $ freq   : int  4 1 1 1 1 2 1 1 1 1 ...
##  $ purp1  : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ purp2  : int  99 99 99 99 99 99 99 99 99 99 ...
##  $ type   : int  3 1 1 1 1 5 1 1 1 1 ...
##  $ prepay : int  1 1 1 1 1 2 1 1 1 1 ...
##  $ pmoney : chr  "25000" "4000" "4000" "3880" ...
##  $ pdollar: chr  "16" "2" "2" "2" ...
##  $ airf   : chr  "2" "1" "1" "1" ...
##  $ pit1   : chr  "1" "1" "1" "1" ...
##  $ pit2   : chr  "0" "1" "1" "1" ...
##  $ pit3   : chr  "0" "1" "1" "1" ...
##  $ pit4   : chr  "0" "1" "1" "1" ...
##  $ pit5   : chr  "0" "1" "1" "1" ...
##  $ money  : int  35000 10000 8000 1500 4500 50000 5000 7000 10000 4000 ...
##  $ dollar : chr  "16" "2" "2" "2" ...
##  $ money1 : int  0 0 0 0 0 35000 0 0 0 0 ...
##  $ p1     : chr  "." "." "." "." ...
##  $ money2 : int  12250 0 0 0 0 5000 0 0 0 0 ...
##  $ p2     : chr  "2" "." "." "." ...
##  $ money3 : int  12250 0 0 0 0 7500 0 0 0 0 ...
##  $ p3     : chr  "2" "." "." "." ...
##  $ money4 : int  7000 0 0 0 0 2500 0 0 0 0 ...
##  $ p4     : chr  "2" "." "." "." ...
##  $ money5 : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ p5     : chr  "." "." "." "." ...
##  $ money6 : int  3500 10000 8000 1500 4500 0 5000 7000 10000 4000 ...
##  $ p6     : chr  "2" "1" "1" "1" ...
##  $ m601   : int  0 1000 0 0 1800 0 0 1400 0 0 ...
##  $ m602   : int  0 4000 0 0 0 0 0 0 0 1200 ...
##  $ m603   : int  350 0 1600 0 0 0 0 350 0 800 ...
##  $ m604   : int  0 0 0 0 900 0 0 1750 0 1600 ...
##  $ m605   : int  3150 5000 2400 1500 1800 0 2500 3500 3000 400 ...
##  $ m606   : int  0 0 1600 0 0 0 500 0 3000 0 ...
##  $ m607   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ m608   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ m609   : int  0 0 2400 0 0 0 2000 0 4000 0 ...
##  $ m610   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ act01  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ act02  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ act03  : int  0 1 0 0 0 0 1 1 0 0 ...
##  $ act04  : int  1 0 0 0 0 0 0 0 0 0 ...
##  $ act05  : int  0 0 0 0 0 0 1 0 0 0 ...
##  $ act06  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ act07  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ act08  : int  1 1 1 1 1 0 1 1 1 1 ...
##  $ act09  : int  0 1 1 1 1 0 1 1 1 1 ...
##  $ act10  : int  1 1 1 1 1 0 0 0 1 1 ...
##  $ act11  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ act12  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ act13  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ act14  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ act15  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ act16  : int  0 1 1 0 0 1 0 1 1 0 ...
##  $ act17  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ act18  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ act19  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ nation : int  2 2 2 2 2 1 2 2 2 2 ...
##  $ age    : int  3 6 6 5 6 5 7 2 3 3 ...
##  $ income : int  2 8 8 8 8 7 8 2 3 2 ...
##  $ educ   : int  3 3 2 3 2 3 2 2 2 3 ...
##  $ occup  : int  7 12 12 12 12 3 12 5 8 5 ...
##  $ gender : int  1 2 1 2 1 1 2 2 1 2 ...
##  $ id     : chr  "1030001" "1030002" "1030003" "1030004" ...
##  $ year   : num  103 103 103 103 103 103 103 103 103 103 ...
alltb <- as.tibble(alldf)
alltb[alltb=="."] <- NA
alltb <- alltb %>% 
  select(id,year,everything())

# alltb

cols = 1:length(alltb)
alltb[,cols] <-  apply(alltb[,cols], 2, function(x) as.numeric(as.character(x)))

cols = c()
alltb
# glimpse(alltb)
# NA & double
  • 99 = 未回答

0.1 Dep. Var

0.1.1 Exchange Rate problem

(PreSum <- alltb %>% 
  group_by(pdollar) %>% 
  summarise(n=n(),mean_pmoney=log(mean(pmoney)), sd=sd(pmoney)))
PreSum %>% 
  ggplot(aes(x=pdollar, y=mean_pmoney))+
  geom_point()+
  xlab("Dollar(Category)")+ylab("Mean_Pmoney(log)")+
  ggtitle("Before Fixing XR")
## Warning: Removed 1 rows containing missing values (geom_point).

Dollar_year106_document

# nation
alltb %>% 
  filter(!is.na(pdollar)) %>% 
  group_by(pdollar) %>% 
  ggplot(aes(x=nation, y=log(pmoney), color=as.factor(gender)))+
  geom_point()+
  facet_wrap(~pdollar)+
  xlab("Nation")+ylab("Pmoney(log)")+
  ggtitle("NATION: Before Fixing XR")

# occup
alltb %>% 
  filter(!is.na(pdollar)) %>% 
  group_by(pdollar) %>% 
  ggplot(aes(x=occup, y=log(pmoney), color=as.factor(gender)))+
  geom_point()+
  facet_wrap(~pdollar)+
  xlab("Occupation")+ylab("Pmoney(log)")+
  ggtitle("Occupation: Before Fixing XR")

alltb %>% 
  filter(!is.na(pdollar)) %>% 
  filter(occup!=99) %>%
  group_by(pdollar) %>% 
  ggplot(aes(x=occup, y=log(pmoney), color=as.factor(gender)))+
  geom_point()+
  facet_wrap(~pdollar)+
  xlab("Occupation")+ylab("Pmoney(log)")+
  ggtitle("Occup filter out 99")

Occup_year106_document